[Beta] Create a BenchmarkJob.

import Runloop from '@runloop/api-client'; const client = new Runloop({ bearerToken: process.env['RUNLOOP_API_KEY'], // This is the default and can be omitted }); const benchmarkJobView = await client.benchmarkJobs.create(); console.log(benchmarkJobView.id);

{ "id": "<string>", "name": "<string>", "create_time_ms": 123, "job_source": { "inline_yaml": "<string>", "type": "harbor" }, "job_spec": { "scenario_ids": [ "<string>" ], "agent_configs": [ { "name": "<string>", "type": "job_agent", "agent_id": "<string>", "model_name": "<string>", "timeout_seconds": 123, "kwargs": {}, "agent_environment": { "environment_variables": {}, "secrets": {} } } ], "orchestrator_config": { "n_concurrent_trials": 123, "n_attempts": 123, "timeout_multiplier": 123, "quiet": true } }, "failure_reason": "<string>", "benchmark_outcomes": [ { "benchmark_run_id": "<string>", "agent_name": "<string>", "n_completed": 123, "n_failed": 123, "n_timeout": 123, "scenario_outcomes": [ { "scenario_definition_id": "<string>", "scenario_name": "<string>", "scenario_run_id": "<string>", "score": 123, "duration_ms": 123, "failure_reason": { "exception_type": "<string>", "exception_message": "<string>" } } ], "model_name": "<string>", "average_score": 123, "duration_ms": 123 } ], "in_progress_runs": [ { "benchmark_run_id": "<string>", "start_time_ms": 123, "agent_config": { "type": "external_api", "info": "<string>" }, "duration_ms": 123 } ] }

Authorizations

Authorization

string

header

required

Bearer authentication header of the form Bearer <token>, where <token> is your auth token.

Body

application/json

BenchmarkJobCreateParameters contain the set of parameters to create a BenchmarkJob.

name

string | null

The name of the BenchmarkJob. If not provided, name will be generated based on target dataset.

spec

object

The job specification. Exactly one spec type must be set.

Option 1
Option 2
Option 3

Show child attributes

Response

200 - application/json

A BenchmarkJobView represents a benchmark job that runs a set of scenarios entirely on runloop.

string

required

The ID of the BenchmarkJob.

name

string

required

The unique name of the BenchmarkJob.

state

enum<string>

required

The current state of the benchmark job.

Available options:

initializing,

queued,

running,

completed,

failed,

cancelled,

timeout

create_time_ms

integer<int64>

required

Timestamp when job was created (Unix milliseconds).

job_source

object

The source configuration that was used to create this job. Either Harbor YAML or benchmark definition reference.

Option 1
Option 2
Option 3

Show child attributes

job_spec

object

The resolved job specification. Contains scenarios, agents, and orchestrator config.

Show child attributes

failure_reason

string | null

Failure reason if job failed.

benchmark_outcomes

object[] | null

Detailed outcome data for each benchmark run created by this job. Includes per-agent results and scenario-level details.

Show child attributes

in_progress_runs

object[] | null

Benchmark runs currently in progress for this job. Shows runs that have not yet completed.

Show child attributes